library(Rcpp)
cppFunction('
int cpp_gcd(int a, int b) {
while (b != 0) {
int temp = b;
b = a % b;
a = temp;
}
return abs(a);
}
')
cpp_gcd(20, 25)[1] 5
rational classFunction for GCD
library(Rcpp)
cppFunction('
int cpp_gcd(int a, int b) {
while (b != 0) {
int temp = b;
b = a % b;
a = temp;
}
return abs(a);
}
')
cpp_gcd(20, 25)[1] 5
Function for LCM
cppFunction('
int cpp_lcm(int a, int b) {
int x = a;
int y = b;
while (y != 0) {
int temp = y;
y = x % y;
x = temp;
}
return abs(a * b) / abs(x);
}
')
# Test the functions
cpp_lcm(20, 25) # Should return 100[1] 100
# Define the rational S4 class
setClass("rational",
slots = list(numerator = "numeric",
denominator = "numeric"))
# Constructor
rational <- function(numerator, denominator = 1) {
# if (denominator == 0) {
# stop("Denominator cannot be zero.")
# }
obj <- new("rational", numerator = numerator, denominator = denominator)
validObject(obj) # Check validity
return(obj)
}
# Validator
setValidity("rational", function(object) {
if (object@denominator == 0) {
return("Denominator cannot be zero.")
}
TRUE
})Class "rational" [in ".GlobalEnv"]
Slots:
Name: numerator denominator
Class: numeric numeric
# Show method
setMethod("show", "rational", function(object) {
cat(object@numerator, "/", object@denominator, "\n")
})
# Simplify method
setGeneric("simplify", function(object) standardGeneric("simplify"))[1] "simplify"
setMethod("simplify", "rational", function(object) {
a <- abs(object@numerator)
b <- abs(object@denominator)
gcd <- cpp_gcd(a, b)
# Simplify by devide gcd
new("rational", numerator = object@numerator / gcd, denominator = object@denominator / gcd)
})
# Quotient method
setGeneric("quotient", function(object, digit = 2) standardGeneric("quotient"))[1] "quotient"
setMethod("quotient", "rational", function(object, digit = 2) {
if (digit %% 1 != 0) {
stop("Digits must be a non-negative number.")
}
result <- object@numerator / object@denominator
format_string <- paste0("%.", digit, "f")
round_r <- sprintf(format_string,round(result, digit))
print(round_r)
return(result)
})
# Arithmetic methods
setMethod("+", signature(e1 = "rational", e2 = "rational"), function(e1, e2) {
lcm_denom <- cpp_lcm(e1@denominator, e2@denominator)
new_numerator <- e1@numerator * (lcm_denom / e1@denominator) + e2@numerator * (lcm_denom / e2@denominator)
simplify(new("rational", numerator = new_numerator, denominator = lcm_denom))
})
setMethod("-", signature(e1 = "rational", e2 = "rational"), function(e1, e2) {
lcm_denom <- cpp_lcm(e1@denominator, e2@denominator)
new_numerator <- e1@numerator * (lcm_denom / e1@denominator) - e2@numerator * (lcm_denom / e2@denominator)
simplify(new("rational", numerator = new_numerator, denominator = lcm_denom))
})
setMethod("*", signature(e1 = "rational", e2 = "rational"), function(e1, e2) {
simplify(new("rational", numerator = e1@numerator * e2@numerator, denominator = e1@denominator * e2@denominator))
})
setMethod("/", signature(e1 = "rational", e2 = "rational"), function(e1, e2) {
if (e2@numerator == 0) stop("Cannot divide by zero.")
simplify(new("rational", numerator = e1@numerator * e2@denominator, denominator = e1@denominator * e2@numerator))
})rational class to create three objects:# Create instances of Rational numbers
r1 <- rational(24, 6)
r2 <- rational(7, 230)
r3 <- rational(0, 4)
print(c(r1, r2, r3))[[1]]
24 / 6
[[2]]
7 / 230
[[3]]
0 / 4
r124 / 6
r30 / 4
r1 + r2927 / 230
r1 - r2913 / 230
r1 * r214 / 115
r1 / r2920 / 7
r1 + r34 / 1
r1 * r30 / 1
r2 / r3Error in r2/r3: Cannot divide by zero.
quotient(r1)[1] "4.00"
[1] 4
quotient(r2)[1] "0.03"
[1] 0.03043478
quotient(r2, digit = 3)[1] "0.030"
[1] 0.03043478
quotient(r2, digit = 3.14)Error in quotient(r2, digit = 3.14): Digits must be a non-negative number.
quotient(r2, digit = "avocado")Error in digit%%1: non-numeric argument to binary operator
q2 <- quotient(r2, digit = 3)[1] "0.030"
q2[1] 0.03043478
quotient(r3)[1] "0.00"
[1] 0
simplify(r1)4 / 1
simplify(r2)7 / 230
simplify(r3)0 / 1
r4 <- rational(24, 0)Error in validObject(.Object): invalid class "rational" object: Denominator cannot be zero.
Does the distribution of genre of sales across years appear to change?
# Import data
art_sales <- read.csv("/Users/nynn/Library/CloudStorage/OneDrive-Umich/Umich course/2024_Fall/Stats 506/Stats_506_PS/Stats_506_PS4/df_for_ml_improved_new_market.csv")
# Load necessary libraries
library(tidyverse)── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.1 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)
Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':
last_plot
The following object is masked from 'package:stats':
filter
The following object is masked from 'package:graphics':
layout
# Summarize data by year and genre, including NAs/no info as others
genre_summary <- art_sales %>%
group_by(year) %>%
summarise(
Photography = sum(Genre___Photography, na.rm = TRUE),
Print = sum(Genre___Print, na.rm = TRUE),
Sculpture = sum(Genre___Sculpture, na.rm = TRUE),
Painting = sum(Genre___Painting, na.rm = TRUE),
Other = sum(Genre___Photography == 0 & Genre___Print == 0 &
Genre___Sculpture == 0 & Genre___Painting == 0)
)
# Reshape data for Plotly
genre_long <- genre_summary %>%
pivot_longer(cols = c(Photography, Print, Painting, Sculpture, Other),
names_to = "Genre",
values_to = "Count") %>%
group_by(year) %>%
mutate(Percentage = round(Count / sum(Count) * 100, 1))
# Absolute number plot
absolute_plot <- plot_ly(genre_long, x = ~as.factor(year), y = ~Count, color = ~Genre, type = "bar") %>%
layout(
title = "Distribution of Genre Over Years - Absolute Number",
xaxis = list(title = "Year"),
yaxis = list(title = "Number of Sales"),
barmode = "stack",
legend = list(title = list(text = "Genre"))
)
absolute_plot# Percentage plot (stacked to 100%)
percentage_plot <- plot_ly(genre_long, x = ~as.factor(year), y = ~Percentage, color = ~Genre, type = "bar", text = ~Genre) %>%
layout(
title = "Distribution of Genre Over Years - Percentage",
xaxis = list(title = "Year"),
yaxis = list(title = "Percentage of Sales", ticksuffix = "%"),
barmode = "stack",
legend = list(title = list(text = "Genre"))
)
percentage_plotIs there a change in the sales price in USD over time?
How does the genre affect the change in sales price over time?
# Prepare data with median and IQR
df_yearly_price <- art_sales %>%
group_by(year) %>%
summarise(
median_price_usd = median(price_usd, na.rm = TRUE),
lower_q = quantile(price_usd, 0.25, na.rm = TRUE), # 25th percentile
upper_q = quantile(price_usd, 0.75, na.rm = TRUE) # 75th percentile
)
# Create the interactive Plotly plot
plot <- plot_ly(df_yearly_price, x = ~year) %>%
# Add ribbon for IQR
add_ribbons(ymin = ~lower_q, ymax = ~upper_q, name = "IQR (25th - 75th Percentile)",
fillcolor = 'rgba(173, 216, 230, 0.4)', line = list(color = 'transparent')) %>%
# Add median line
add_lines(y = ~median_price_usd, name = "Median Price", line = list(color = 'blue', width = 2)) %>%
# Add median points
add_markers(y = ~median_price_usd, name = "Median Price", marker = list(color = 'blue', size = 6)) %>%
# Layout for titles and styling
layout(
title = "Sales Price Distribution Over Time with IQR",
xaxis = list(title = "Year"),
yaxis = list(title = "Sales Price (USD)"),
legend = list(title = list(text = "Legend")),
hovermode = "x unified" # Display hover info for all elements together
)
plot# Create a dataset for genre
art_sales_genre <- art_sales %>%
mutate(Genre = case_when(
Genre___Photography == 1 ~ "Photography",
Genre___Print == 1 ~ "Print",
Genre___Sculpture == 1 ~ "Sculpture",
Genre___Painting == 1 ~ "Painting",
TRUE ~ "Other"
)) %>%
select(year, price_usd, Genre)
art_sales_genre$Genre <- factor(art_sales_genre$Genre, levels = c("Painting", "Photography", "Print", "Sculpture", "Other"))
# Calculate IQR (Interquartile Range) for each genre and year
genre_price_summary <- art_sales_genre %>%
group_by(year, Genre) %>%
summarise(
median_price = median(price_usd, na.rm = TRUE),
lower_q = quantile(price_usd, 0.25, na.rm = TRUE), # 25th percentile
upper_q = quantile(price_usd, 0.75, na.rm = TRUE) # 75th percentile
)`summarise()` has grouped output by 'year'. You can override using the
`.groups` argument.
# Generate a list of plotly plots for each genre with IQR ribbons and median lines
plots <- genre_price_summary %>%
group_by(Genre) %>%
do(plot = plot_ly(data = ., x = ~year) %>%
add_ribbons(
ymin = ~lower_q,
ymax = ~upper_q,
fillcolor = ~Genre,
name = "IQR",
showlegend = FALSE,
opacity = 0.5
) %>%
add_lines(
y = ~median_price,
color = ~Genre,
name = ~Genre,
line = list(width = 3),
hoverinfo = "text",
text = ~paste("Year:", year,
"<br>Median Price:", round(median_price, 2),
"<br>Genre:", Genre)
) %>%
layout(
title = list(text = ~unique(Genre), xref = "paper"),
xaxis = list(title = "Year"),
yaxis = list(title = "Median Sales Price (USD)")
))
# Use subplot to create a faceted plot with each genre in a separate facet, similar to ggplot's facet_wrap
final_plot <- subplot(plots$plot, nrows = 2, shareX = TRUE, titleX = TRUE)
# Display the final interactive plot with a shared legend at the top
final_plot %>%
layout(
title = "Sales Price Over Time by Genre with IQR",
legend = list(orientation = "h", x = 0.5, xanchor = "center"),
margin = list(t = 50)
)# Load necessary libraries
library(data.table)
Attaching package: 'data.table'
The following objects are masked from 'package:lubridate':
hour, isoweek, mday, minute, month, quarter, second, wday, week,
yday, year
The following objects are masked from 'package:dplyr':
between, first, last
The following object is masked from 'package:purrr':
transpose
library(nycflights13)
# Convert to data.table format
flights_dt <- as.data.table(flights)
airports_dt <- as.data.table(airports)
# Departure delays
departure_delays <- flights_dt[
, .(mean_delay = mean(dep_delay, na.rm = TRUE),
med_delay = median(dep_delay, na.rm = TRUE),
numflights = .N), by = origin
][numflights >= 10][order(-mean_delay)]
# Join with airports data to get airport names
departure_delays <- departure_delays[
airports_dt, on = .(origin = faa), nomatch = 0
][, .(name, mean_delay, med_delay)][order(-mean_delay)]
# Display the result for departure delays
print(departure_delays) name mean_delay med_delay
<char> <num> <num>
1: Newark Liberty Intl 15.10795 -1
2: John F Kennedy Intl 12.11216 -1
3: La Guardia 10.34688 -3
# Arrival delays
arrival_delays <- flights_dt[
, .(mean_delay = mean(arr_delay, na.rm = TRUE),
med_delay = median(arr_delay, na.rm = TRUE),
numflights = .N), by = dest
][numflights >= 10][order(-mean_delay)]
# Join with airports data to get airport names
arrival_delays <- airports_dt[
arrival_delays, on = .(faa = dest)][, name := coalesce(name, faa)][, .(name, mean_delay, med_delay)][order(-mean_delay)]
# Display all rows for arrival delays
print(arrival_delays) name mean_delay med_delay
<char> <num> <num>
1: Columbia Metropolitan 41.764151 28.0
2: Tulsa Intl 33.659864 14.0
3: Will Rogers World 30.619048 16.0
4: Jackson Hole Airport 28.095238 15.0
5: Mc Ghee Tyson 24.069204 2.0
---
98: Seattle Tacoma Intl -1.099099 -11.0
99: Honolulu Intl -1.365193 -7.0
100: STT -3.835907 -9.0
101: John Wayne Arpt Orange Co -7.868227 -11.0
102: Palm Springs Intl -12.722222 -13.5
planes_dt <- as.data.table(planes)
fastest_aircraft <- flights_dt[
planes_dt,
on = "tailnum",
.(model, mph = distance / (air_time / 60)), # Calculate speed in join
nomatch = NULL
][
, .(avgmph = mean(mph, na.rm = TRUE), nflights = .N), by = model # Aggregate by model
][order(-avgmph)][1] # Sort by avgmph and take top row
print(fastest_aircraft) model avgmph nflights
<char> <num> <int>
1: 777-222 482.6254 4